{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from __future__ import print_function\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.optim as optim\n",
    "import numpy as np\n",
    "import matplotlib\n",
    "matplotlib.use('Agg')\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "class Sequence(nn.Module):\n",
    "    def __init__(self):\n",
    "        super(Sequence, self).__init__()\n",
    "        self.lstm1 = nn.LSTMCell(1, 51)\n",
    "#         self.lstm2 = nn.LSTMCell(51, 51)\n",
    "        self.linear = nn.Linear(51, 1)\n",
    "\n",
    "    def forward(self, input, future = 0):\n",
    "        outputs = []\n",
    "        h_t = torch.zeros(input.size(0), 51, dtype=torch.double)\n",
    "        c_t = torch.zeros(input.size(0), 51, dtype=torch.double)\n",
    "#         h_t2 = torch.zeros(input.size(0), 51, dtype=torch.double)\n",
    "#         c_t2 = torch.zeros(input.size(0), 51, dtype=torch.double)\n",
    "\n",
    "        for i, input_t in enumerate(input.chunk(input.size(1), dim=1)):\n",
    "            h_t, c_t = self.lstm1(input_t, (h_t, c_t))\n",
    "            \n",
    "#             h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))\n",
    "            \n",
    "#             print(\"h_t2=\",h_t2)\n",
    "            output = self.linear(h_t)\n",
    "            outputs += [output]\n",
    "#             print(outputs)\n",
    "        for i in range(future):# if we should predict the future\n",
    "            h_t, c_t = self.lstm1(output, (h_t, c_t))\n",
    "#             h_t2, c_t2 = self.lstm2(h_t, (h_t2, c_t2))\n",
    "            output = self.linear(h_t)\n",
    "            outputs += [output]\n",
    "        outputs = torch.stack(outputs, 1).squeeze(2)\n",
    "        return outputs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "aa= [0 1 2 3 4 5 6 7 8 9]\n",
      "bb= [0 7 5 0]\n",
      "cc= [[0]\n",
      " [7]\n",
      " [5]\n",
      " [0]]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],\n",
       "       [ 7,  8,  9, 10, 11, 12, 13, 14, 15, 16],\n",
       "       [ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14],\n",
       "       [ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9]], dtype=int64)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "np.random.seed(2)\n",
    "\n",
    "T = 2\n",
    "L = 10\n",
    "N = 4\n",
    "\n",
    "x = np.empty((N, L), 'int64')\n",
    "aa=np.array(range(L))\n",
    "bb=np.random.randint(-4 * T, 4 * T, N)\n",
    "cc=bb.reshape(N, 1)\n",
    "x[:] = aa +cc \n",
    "print(\"aa=\",aa)\n",
    "print('bb=',bb)\n",
    "print('cc=',cc)\n",
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0.        ,  0.47942554,  0.84147098,  0.99749499,  0.90929743,\n",
       "         0.59847214,  0.14112001, -0.35078323, -0.7568025 , -0.97753012],\n",
       "       [-0.35078323, -0.7568025 , -0.97753012, -0.95892427, -0.70554033,\n",
       "        -0.2794155 ,  0.21511999,  0.6569866 ,  0.93799998,  0.98935825],\n",
       "       [ 0.59847214,  0.14112001, -0.35078323, -0.7568025 , -0.97753012,\n",
       "        -0.95892427, -0.70554033, -0.2794155 ,  0.21511999,  0.6569866 ],\n",
       "       [ 0.        ,  0.47942554,  0.84147098,  0.99749499,  0.90929743,\n",
       "         0.59847214,  0.14112001, -0.35078323, -0.7568025 , -0.97753012]])"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "data = np.sin(x / 1.0 / T).astype('float64')\n",
    "torch.save(data, open(r'D:\\projects\\pytorch-examples\\time_sequence_prediction\\traindata.pt', 'wb'))\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([[ 0.0000,  0.4794,  0.8415,  0.9975,  0.9093,  0.5985,  0.1411,\n",
      "         -0.3508, -0.7568]], dtype=torch.float64)\n",
      "tensor([[ 0.4794,  0.8415,  0.9975,  0.9093,  0.5985,  0.1411, -0.3508,\n",
      "         -0.7568, -0.9775]], dtype=torch.float64)\n"
     ]
    }
   ],
   "source": [
    "np.random.seed(0)\n",
    "torch.manual_seed(0)\n",
    "# load data and make training set\n",
    "data = torch.load(r'D:\\projects\\pytorch-examples\\time_sequence_prediction\\traindata.pt')\n",
    "input = torch.from_numpy(data[3:, :-1])\n",
    "target = torch.from_numpy(data[3:, 1:])\n",
    "print(input)\n",
    "print(target)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "from torch.autograd import Function,Variable\n",
    "\n",
    "# Inherit from Function\n",
    "class Linear(Function):\n",
    "\n",
    "    # bias is an optional argument\n",
    "    def forward(self, input, weight, bias=None):\n",
    "        self.save_for_backward(input, weight, bias)\n",
    "        output = input.mm(weight.t())\n",
    "        if bias is not None:\n",
    "            output += bias.unsqueeze(0).expand_as(output)\n",
    "        return output\n",
    "\n",
    "    # This function has only a single output, so it gets only one gradient\n",
    "    def backward(self, grad_output):\n",
    "        # This is a pattern that is very convenient - at the top of backward\n",
    "        # unpack saved_tensors and initialize all gradients w.r.t. inputs to\n",
    "        # None. Thanks to the fact that additional trailing Nones are\n",
    "        # ignored, the return statement is simple even when the function has\n",
    "        # optional inputs.\n",
    "        input, weight, bias = self.saved_tensors\n",
    "        grad_input = grad_weight = grad_bias = None\n",
    "\n",
    "        # These needs_input_grad checks are optional and there only to\n",
    "        # improve efficiency. If you want to make your code simpler, you can\n",
    "        # skip them. Returning gradients for inputs that don't require it is\n",
    "        # not an error.\n",
    "        if self.needs_input_grad[0]:\n",
    "            grad_input = grad_output.mm(weight)\n",
    "        if self.needs_input_grad[1]:\n",
    "            grad_weight = grad_output.t().mm(input)\n",
    "        if bias is not None and self.needs_input_grad[2]:\n",
    "            grad_bias = grad_output.sum(0).squeeze(0)\n",
    "\n",
    "        return grad_input, grad_weight, grad_bias\n",
    "\n",
    "def linear(input, weight, bias=None):\n",
    "    # First braces create a Function object. Any arguments given here\n",
    "    # will be passed to __init__. Second braces will invoke the __call__\n",
    "    # operator, that will then use forward() to compute the result and\n",
    "    # return it.\n",
    "    return Linear()(input, weight, bias)\n",
    "\n",
    "\n",
    "class Linear(nn.Module):\n",
    "    def __init__(self, input_features, output_features, bias=True):\n",
    "        self.input_features = input_features\n",
    "        self.output_features = output_features\n",
    "\n",
    "        # nn.Parameter is a special kind of Variable, that will get\n",
    "        # automatically registered as Module's parameter once it's assigned\n",
    "        # as an attribute. Parameters and buffers need to be registered, or\n",
    "        # they won't appear in .parameters() (doesn't apply to buffers), and\n",
    "        # won't be converted when e.g. .cuda() is called. You can use\n",
    "        # .register_buffer() to register buffers.\n",
    "        # nn.Parameters can never be volatile and, different than Variables,\n",
    "        # they require gradients by default.\n",
    "        self.weight = nn.Parameter(torch.Tensor(input_features, output_features))\n",
    "        if bias:\n",
    "            self.bias = nn.Parameter(torch.Tensor(output_features))\n",
    "        else:\n",
    "            # You should always register all possible parameters, but the\n",
    "            # optional ones can be None if you want.\n",
    "            self.register_parameter('bias', None)\n",
    "\n",
    "        # Not a very smart way to initialize weights\n",
    "        self.weight.data.uniform_(-0.1, 0.1)\n",
    "        if bias is not None:\n",
    "            self.bias.data.uniform_(-0.1, 0.1)\n",
    "\n",
    "    def forward(self, input):\n",
    "        # See the autograd section for explanation of what happens here.\n",
    "        return Linear()(input, self.weight, self.bias)\n",
    "        #注意这个Linear是之前实现过的Linear\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "ename": "RuntimeError",
     "evalue": "mse_loss_forward is not implemented for type torch.IntTensor",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-39-7cbac0f1d836>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m     30\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mVariable\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtensor\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m3\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m4\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     31\u001b[0m \u001b[0mcriterion\u001b[0m \u001b[1;33m=\u001b[0m\u001b[0mmyMSELoss\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 32\u001b[1;33m \u001b[0mloss\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcriterion\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mout\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     33\u001b[0m \u001b[0mloss\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\Anaconda3\\lib\\site-packages\\torch\\nn\\modules\\module.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *input, **kwargs)\u001b[0m\n\u001b[0;32m    489\u001b[0m             \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_slow_forward\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0minput\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    490\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 491\u001b[1;33m             \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0minput\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    492\u001b[0m         \u001b[1;32mfor\u001b[0m \u001b[0mhook\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_forward_hooks\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    493\u001b[0m             \u001b[0mhook_result\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mhook\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minput\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m<ipython-input-39-7cbac0f1d836>\u001b[0m in \u001b[0;36mforward\u001b[1;34m(self, input, target)\u001b[0m\n\u001b[0;32m     25\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mforward\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minput\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     26\u001b[0m \u001b[1;31m#         _assert_no_grad(target)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 27\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mmse_loss\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minput\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msize_average\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msize_average\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreduce\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreduce\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     28\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     29\u001b[0m \u001b[0mout\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mVariable\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtensor\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;36m2\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m<ipython-input-39-7cbac0f1d836>\u001b[0m in \u001b[0;36mmse_loss\u001b[1;34m(input, target, size_average, reduce)\u001b[0m\n\u001b[0;32m     15\u001b[0m     \"\"\"\n\u001b[0;32m     16\u001b[0m     return _pointwise_loss(lambda a, b: (a - b) ** 2, torch._C._nn.mse_loss,\n\u001b[1;32m---> 17\u001b[1;33m                            input, target, size_average, reduce)\n\u001b[0m\u001b[0;32m     18\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     19\u001b[0m \u001b[1;32mclass\u001b[0m \u001b[0mmyMSELoss\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mModule\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m<ipython-input-39-7cbac0f1d836>\u001b[0m in \u001b[0;36m_pointwise_loss\u001b[1;34m(lambd, lambd_optimized, input, target, size_average, reduce)\u001b[0m\n\u001b[0;32m      7\u001b[0m         \u001b[1;32mreturn\u001b[0m \u001b[0mtorch\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0md\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0msize_average\u001b[0m \u001b[1;32melse\u001b[0m \u001b[0mtorch\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msum\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0md\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      8\u001b[0m     \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 9\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mlambd_optimized\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minput\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msize_average\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreduce\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     10\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     11\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mmse_loss\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minput\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msize_average\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreduce\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mRuntimeError\u001b[0m: mse_loss_forward is not implemented for type torch.IntTensor"
     ],
     "output_type": "error"
    }
   ],
   "source": [
    "# import torch.nn.functional as F\n",
    "def _pointwise_loss(lambd, lambd_optimized, input, target, size_average=True, reduce=True):\n",
    "    if target.requires_grad:\n",
    "        d = lambd(input, target)\n",
    "        if not reduce:\n",
    "            return d\n",
    "        return torch.mean(d) if size_average else torch.sum(d)\n",
    "    else:\n",
    "        return lambd_optimized(input, target, size_average, reduce)\n",
    "    \n",
    "def mse_loss(input, target, size_average=True, reduce=True):\n",
    "    r\"\"\"mse_loss(input, target, size_average=True, reduce=True) -> Tensor\n",
    "    Measures the element-wise mean squared error.\n",
    "    See :class:`~torch.nn.MSELoss` for details.\n",
    "    \"\"\"\n",
    "    return _pointwise_loss(lambda a, b: (a - b) ** 2, torch._C._nn.mse_loss,\n",
    "                           input, target, size_average, reduce)\n",
    "\n",
    "class myMSELoss(nn.Module):\n",
    "    def __init__(self, size_average=True, reduce=True):\n",
    "        super(myMSELoss, self).__init__()\n",
    "        self.size_average = size_average\n",
    "        self.reduce = reduce\n",
    "\n",
    "    def forward(self, input, target):\n",
    "#         _assert_no_grad(target)\n",
    "        return mse_loss(input, target, size_average=self.size_average, reduce=self.reduce)\n",
    "\n",
    "out=Variable(torch.tensor(np.array([1,2])))\n",
    "target=Variable(torch.tensor(np.array([3,4])))\n",
    "criterion =myMSELoss()\n",
    "loss = criterion(out, target)\n",
    "loss.backward()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "not enough values to unpack (expected 2, got 0)",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-45-2ae4ef29b49f>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m     26\u001b[0m \u001b[0mcriterion\u001b[0m \u001b[1;33m=\u001b[0m\u001b[0mLinear\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     27\u001b[0m \u001b[0mloss\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcriterion\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mout\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 28\u001b[1;33m \u001b[0mloss\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32m<ipython-input-45-2ae4ef29b49f>\u001b[0m in \u001b[0;36mbackward\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m      7\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      8\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mbackward\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 9\u001b[1;33m         \u001b[0minput\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtarget\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msaved_tensors\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     10\u001b[0m         \u001b[1;32mreturn\u001b[0m \u001b[1;36m2\u001b[0m\u001b[1;33m*\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minput\u001b[0m\u001b[1;33m-\u001b[0m\u001b[0mtarget\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     11\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mValueError\u001b[0m: not enough values to unpack (expected 2, got 0)"
     ],
     "output_type": "error"
    }
   ],
   "source": [
    "from torch.autograd import Function,Variable\n",
    "\n",
    "class FLinear(Function):\n",
    "    def __init__(self, input, target):\n",
    "        self.save_for_backward(input, target)        \n",
    "#         return (input-target)**2\n",
    "    \n",
    "    def backward(self):\n",
    "        input, target = self.saved_tensors       \n",
    "        return 2*(input-target)\n",
    "\n",
    "def linear(input, target):\n",
    "    return FLinear()(input, target)\n",
    "\n",
    "\n",
    "class Linear(nn.Module):\n",
    "    def __init__(self):\n",
    "        pass\n",
    "    def forward(self, input,target):\n",
    "        # See the autograd section for explanation of what happens here.\n",
    "        return FLinear(input, target)\n",
    "        #注意这个Linear是之前实现过的Linear\n",
    "\n",
    "out=Variable(torch.tensor(np.array([1,2])))\n",
    "target=Variable(torch.tensor(np.array([3,4])))\n",
    "criterion =Linear()\n",
    "loss = criterion.forward(out, target)\n",
    "loss.backward()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "STEP:  0\n"
     ]
    },
    {
     "ename": "NameError",
     "evalue": "name '_pointwise_loss' is not defined",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-28-c98e6d615ad6>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m     52\u001b[0m         \u001b[0mplt\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     53\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 54\u001b[1;33m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32m<ipython-input-28-c98e6d615ad6>\u001b[0m in \u001b[0;36mf\u001b[1;34m()\u001b[0m\n\u001b[0;32m     26\u001b[0m             \u001b[0mloss\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     27\u001b[0m             \u001b[1;32mreturn\u001b[0m \u001b[0mloss\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 28\u001b[1;33m         \u001b[0moptimizer\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mclosure\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     29\u001b[0m         \u001b[1;31m# begin to predict, no need to track gradient here\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     30\u001b[0m         \u001b[1;32mwith\u001b[0m \u001b[0mtorch\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mno_grad\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\Anaconda3\\lib\\site-packages\\torch\\optim\\lbfgs.py\u001b[0m in \u001b[0;36mstep\u001b[1;34m(self, closure)\u001b[0m\n\u001b[0;32m    101\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    102\u001b[0m         \u001b[1;31m# evaluate initial f(x) and df/dx\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 103\u001b[1;33m         \u001b[0morig_loss\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mclosure\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    104\u001b[0m         \u001b[0mloss\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfloat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0morig_loss\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    105\u001b[0m         \u001b[0mcurrent_evals\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m<ipython-input-28-c98e6d615ad6>\u001b[0m in \u001b[0;36mclosure\u001b[1;34m()\u001b[0m\n\u001b[0;32m     22\u001b[0m \u001b[1;31m#             print(input.size(0))\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     23\u001b[0m             \u001b[0mout\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mseq\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minput\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 24\u001b[1;33m             \u001b[0mloss\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcriterion\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mout\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     25\u001b[0m \u001b[1;31m#             print('loss:', loss.item())\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     26\u001b[0m             \u001b[0mloss\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\Anaconda3\\lib\\site-packages\\torch\\nn\\modules\\module.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *input, **kwargs)\u001b[0m\n\u001b[0;32m    489\u001b[0m             \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_slow_forward\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0minput\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    490\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 491\u001b[1;33m             \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mforward\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0minput\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    492\u001b[0m         \u001b[1;32mfor\u001b[0m \u001b[0mhook\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_forward_hooks\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    493\u001b[0m             \u001b[0mhook_result\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mhook\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minput\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m<ipython-input-27-09377d2f5f6e>\u001b[0m in \u001b[0;36mforward\u001b[1;34m(self, input, target)\u001b[0m\n\u001b[0;32m     19\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mforward\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minput\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     20\u001b[0m \u001b[1;31m#         _assert_no_grad(target)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 21\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mmse_loss\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minput\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msize_average\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msize_average\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreduce\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreduce\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32m<ipython-input-27-09377d2f5f6e>\u001b[0m in \u001b[0;36mmse_loss\u001b[1;34m(input, target, size_average, reduce)\u001b[0m\n\u001b[0;32m      8\u001b[0m     \u001b[0mSee\u001b[0m \u001b[1;33m:\u001b[0m\u001b[1;32mclass\u001b[0m\u001b[1;33m:\u001b[0m\u001b[0;31m`\u001b[0m\u001b[1;33m~\u001b[0m\u001b[0mtorch\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mMSELoss\u001b[0m\u001b[0;31m`\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mdetails\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      9\u001b[0m     \"\"\"\n\u001b[1;32m---> 10\u001b[1;33m     return _pointwise_loss(lambda a, b: (a - b) ** 2, torch._C._nn.mse_loss,\n\u001b[0m\u001b[0;32m     11\u001b[0m                            input, target, size_average, reduce)\n\u001b[0;32m     12\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mNameError\u001b[0m: name '_pointwise_loss' is not defined"
     ],
     "output_type": "error"
    }
   ],
   "source": [
    "def f():\n",
    "    np.random.seed(0)\n",
    "    torch.manual_seed(0)\n",
    "    # load data and make training set\n",
    "    data = torch.load(r'D:\\projects\\pytorch-examples\\time_sequence_prediction\\traindata.pt')\n",
    "    input = torch.from_numpy(data[3:, :-1])\n",
    "    target = torch.from_numpy(data[3:, 1:])\n",
    "    test_input = torch.from_numpy(data[:3, :-1])\n",
    "    test_target = torch.from_numpy(data[:3, 1:])\n",
    "    # build the model\n",
    "    seq = Sequence()\n",
    "    seq.double()\n",
    "#     criterion =nn.MSELoss()\n",
    "    criterion =myMSELoss()\n",
    "    # use LBFGS as optimizer since we can load the whole data to train\n",
    "    optimizer = optim.LBFGS(seq.parameters(), lr=0.8)\n",
    "    #begin to train\n",
    "    for i in range(10):\n",
    "        print('STEP: ', i)\n",
    "        def closure():\n",
    "            optimizer.zero_grad()\n",
    "#             print(input.size(0))\n",
    "            out = seq(input)\n",
    "            loss = criterion(out, target)\n",
    "#             print('loss:', loss.item())\n",
    "            loss.backward()\n",
    "            return loss\n",
    "        optimizer.step(closure)\n",
    "        # begin to predict, no need to track gradient here\n",
    "        with torch.no_grad():\n",
    "            future = 100\n",
    "            pred = seq(test_input, future=future)\n",
    "            loss = criterion(pred[:, :-future], test_target)\n",
    "#             print('test loss:', loss.item())\n",
    "            y = pred.detach().numpy()\n",
    "        # draw the result\n",
    "        print(\"here1\")\n",
    "        plt.figure(figsize=(30,10))\n",
    "        plt.title('Predict future values for time sequences\\n(Dashlines are predicted values)', fontsize=30)\n",
    "        plt.xlabel('x', fontsize=20)\n",
    "        plt.ylabel('y', fontsize=20)\n",
    "        plt.xticks(fontsize=20)\n",
    "        plt.yticks(fontsize=20)\n",
    "        def draw(yi, color):\n",
    "            plt.plot(np.arange(input.size(1)), yi[:input.size(1)], color, linewidth = 2.0)\n",
    "            plt.plot(np.arange(input.size(1), input.size(1) + future), yi[input.size(1):], color + ':', linewidth = 2.0)\n",
    "        draw(y[0], 'r')\n",
    "#         draw(y[1], 'g')\n",
    "#         draw(y[2], 'b')\n",
    "        plt.savefig(r'D:\\projects\\pytorch-examples\\time_sequence_prediction\\predict%d.pdf'%i)\n",
    "        plt.show()\n",
    "        plt.close()\n",
    "        \n",
    "f()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([[ 0.4520, -0.6208,  1.0974,  0.5748],\n",
      "        [-0.8689, -0.4601, -2.0998, -0.1178],\n",
      "        [-0.4503,  0.9647,  1.0128,  0.4169]])\n",
      "tensor([[ 0.2566,  1.2857,  0.4050, -0.4893],\n",
      "        [ 0.2160, -1.0855, -0.9608,  1.4725],\n",
      "        [-0.3604, -1.3850, -0.9320,  0.7884]])\n",
      "tensor(1.6774)\n",
      "torch.Size([3, 4]) torch.Size([3, 4]) torch.Size([])\n"
     ]
    }
   ],
   "source": [
    "loss_fn = torch.nn.MSELoss(reduce=True, size_average=True)\n",
    "input = torch.autograd.Variable(torch.randn(3,4))\n",
    "target = torch.autograd.Variable(torch.randn(3,4))\n",
    "loss = loss_fn(input, target)\n",
    "print(input); print(target); print(loss)\n",
    "print(input.size(), target.size(), loss.size())"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
